####################################################################################
####################################################################################
#
#             R E P L I C A T I O N   C O D E 
#
#             for 
#
#             "No Direct Taxation Without New Elite Representation Industrialization 
#             and the Domestic Politics of Taxation" EJPR
#
#             Emmenegger, Leemann, and Walter 2020
#
####################################################################################
####################################################################################


library(lmtest)       # CRAN v0.9-37
library(multiwayvcov) # CRAN v1.2.3 
library(dplyr)        # CRAN v0.8.5
library(texreg)       # CRAN v1.36.23
library(arm)          # CRAN v1.11-1
library(sandwich)     # CRAN v2.5-1
library(car)          # CRAN v3.0-8
library(ggplot2)      # CRAN v3.3.0
library(tidyr)        # CRAN v1.1.0
library(readxl)       # CRAN v1.3.1
library(ineq)         # CRAN v0.2-13
library(extrafont)    # CRAN v0.17 
library(reshape2)     # CRAN v1.4.4
font_import()
font_install('fontcm')

rm(list=ls())

load("Data/analysis_data.RData")
load("Data/analysis_data_datwint.RData")
#load("Data/analysis_data_LA.RData")

# muni: Number of Municipalities
# suffmax: Number of categories suffrage restrictions
# suffoekmax: Number of categories suffrage restrictions (economic groups only)
# suffoek: Do suffrage restrictions (economic groups only) exist; yes 1, no 0 
# dir_sh: Share direct taxes
# fed_pc: Share federal transfers
# def_sh: Share deficits
# indir_sh: Share indirect taxes
# lux_sh: Share taxes on luxury goods

#########################################################################################################
#########################################################################################################

#       DIR_PC

#########################################################################################################
#########################################################################################################

datwint$competition <- -datwint$herf
datwint$nonagr <- 1-datwint$fir_sh
datwintoLG <- subset(datwint, landsgemeinde !=1)
datwint$ddir_pc <- datwint$dir_pc-datwint$ldir_pc

datwint <- datwint %>% 
  group_by(canton) %>% 
  mutate(fdir_pc = dplyr::lead(dir_pc))

datwint$privateBanken_pc <- datwint$totalBanken_pc - datwint$KB_pc

datwint$poverty <- datwint$babydeathratio_impute

#########################################################################################################
# Compete Increase


datwint <- datwint[datwint$year<1911,]
datwintoLG <- datwintoLG[datwintoLG$year<1911,]

#########################################################################################################
# Some manipulation

lefty <- datwint %>% group_by(year) %>% summarize(leftm = mean(left, na.rm = T))
lefty %>% print(n = Inf)


findat$non_direct <- 1- findat$dir_sh
findat$non_dir_total <- findat$Total * findat$non_direct / findat$cpi
findat$non_dir_total_pc <- findat$non_dir_total/findat$pop

#########################################################################################################
#########################################################################################################
# TABLE 1 in MS
#########################################################################################################
#########################################################################################################

r.sq <- c()

mod1 <- lm(dir_pc~factor(year) + factor(canton)+ldir_pc + def_pc +suffoek+ nonagr + ineqcow+ fed_pc + printed.records  , data=datwint)
r.sq <- c(r.sq,summary(mod1)$r.squared)
mod.1 <-  coeftest(mod1, vcov = vcovHAC(mod1, type="HC3"))

mod4 <- lm(dir_pc~factor(year) + factor(canton)+ldir_pc + def_pc + hddi+suffoek+hddi + nonagr+ competition + left + ineqcow+ fed_pc + printed.records  , data=datwint)
r.sq <- c(r.sq,summary(mod4)$r.squared)
mod5 <-  coeftest(mod4, vcov = vcovHAC(mod4, type="HC3"))

mod4 <- lm(dir_pc~ factor(year)+factor(canton)+ldir_pc + def_pc + hddi+suffoek+hddi*nonagr+ competition + left + ineqcow + fed_pc + printed.records  , data=datwint)
r.sq <- c(r.sq,summary(mod4)$r.squared)
mod6 <-  coeftest(mod4, vcov = vcovHAC(mod4, type="HC3"))

mod4 <- lm(dir_pc~factor(year)+factor(canton)+ldir_pc + def_pc +hddi+suffoek+hddi + competition*nonagr+  left + ineqcow+ fed_pc + printed.records  , data=datwint)
r.sq <- c(r.sq,summary(mod4)$r.squared)
mod8 <-  coeftest(mod4, vcov = vcovHAC(mod4, type="HC3"))

mod4 <- lm(dir_pc~factor(year)+factor(canton)+ldir_pc + def_pc + hddi+suffoek+competition*nonagr +hddi*nonagr+ left + ineqcow + fed_pc + printed.records   , data=datwint)
r.sq <- c(r.sq,summary(mod4)$r.squared)
mod8.1 <-  coeftest(mod4, vcov = vcovHAC(mod4, type="HC3"))

screenreg(list(mod.1,mod5,mod8,mod6,mod8.1),omit.coef="factor|Intercept|year", stars = c( 0.01, 0.05, 0.1), reorder.coef = c(9,8,4,11,12,1,2,3,10,5,6,7))

texreg(list(mod.1, mod5,mod8,mod6,mod8.1),omit.coef="factor|Intercept|year", stars = c( 0.01, 0.05, 0.1), digits=2, 
       custom.model.names=c("Model 0","Model 1","Model 2","Model 3","Model 4"), reorder.coef = c(9,8,4,11,12,1,2,3,10,5,6,7),
       custom.coef.names =c("\\sc{Lag DV}","\\sc{Deficit p.c.}","\\sc{Suffrage Restrictions}","\\sc{Share Non-Agricultural Sector}",
                            "\\sc{Rural Inequality}", "\\sc{Federal Payments (p.c.)}", "\\sc{Printed Documents}","\\sc{Direct Democracy}",
                            "\\sc{Political Competition}","\\sc{Share Left}","\\sc{Pol. Competition $\\times$  \\% Non-Agr}",
                            "\\sc{Direct Democracy$\\times$ \\% Non-Agr}"))

round(r.sq,3)




#########################################################################################################
#########################################################################################################
# TABLE 2 in MS
#########################################################################################################
#########################################################################################################


#########################################################################################################
# Education

mod4 <- lm(edu_pc~factor(year)+factor(canton)+ledu_pc + def_pc + dir_pc + fed_pc + babydeathratio_impute , data=datwint)
r.sq <- c(r.sq,summary(mod4)$r.squared)
mod27 <-  coeftest(mod4, vcov = vcovHAC(mod4, type="HC3"))


texreg(list(mod27),omit.coef="factor|Intercept|year", stars = c( 0.01, 0.05, 0.1), digits=2, 
       custom.model.names=c("Model 5"), reorder.coef = c(3,4,5,2,1),
       custom.coef.names =c("\\sc{Lag Educ Spending (p.c.)}","\\sc{Deficit p.c.}","\\sc{Direct Taxation (p.c.)}", 
                            "\\sc{Federal Payments (p.c.)}","\\sc{Poverty}"))





#########################################################################################################
#########################################################################################################
# Figure 1 in MS
#########################################################################################################
#########################################################################################################



data1 <- read_xls("Data/Tax Introduction Years.xls")


data1 <- data.frame(as.matrix(data1))
data1[,2] <- as.numeric(as.character(data1[,2]))
data1[,3] <- as.numeric(as.character(data1[,3]))


yearsL <- 1848:1910
verm.counter <- rep(NA,length(yearsL))
eink.counter <- rep(NA,length(yearsL))

for (i in 1:length(yearsL)){
  verm.counter[i] <-   length(which(data1[,2] <=yearsL[i]))/25
  eink.counter[i] <-   length(which(data1[,3] <= yearsL[i]))/25
}

par(family="CMU Serif")
plot(c(1848:1910), eink.counter, ylim=c(0,1), type="b", bty="n", col="blue", xlab="", 
     ylab="Share of Cantons with this Tax", lwd=3, cex=0.5, xlim=c(1847,1911), xaxt="n")
axis(1,at = c(1848,1870,1890,1910))
points(c(1848:1910), verm.counter, type="b", col="red", lwd=3, cex=0.5)

legend(1900,.2, c("Income Taxation","Wealth Taxation"), col=c("blue","red"), pch=19,bty="n")





#########################################################################################################
#########################################################################################################
# Figure 2 in MS
#########################################################################################################
#########################################################################################################

mean.object <- matrix(NA,length(unique(findat$year)),2)
mean.object[,1] <- unique(findat$year)
for (i in 1:101){
  mean.object[i,2] <- mean(datwint$dir_pc[datwint$year==(i+1829)], na.rm = TRUE)
}
mean.object1 <- mean.object[!is.na(mean.object[,2]),]

par(mfrow=c(5,5),family="CMU Serif")
for (i in 1:25){
  plot(findat$year[findat$cantonnr==i][21:81],findat$dir_pc[findat$cantonnr==i][21:81], 
       type="l", bty="n", xlab="Year", ylab="DT pc",
       col="purple", lwd=2)#, ylim=c(0,750))
  points(mean.object1[,1], mean.object1[,2], 
         col="darkgrey", lty=2, type="l", lwd=2)
  legend("topleft",legend=paste(unique(findat$canton[findat$cantonnr==i])),bty="n", cex=2)
}




#########################################################################################################
#########################################################################################################
# Figure 3 in MS
#########################################################################################################
#########################################################################################################


par(mfrow=c(1,2), family="CMU Serif")
# HERF
model.plot <- lm(dir_pc~factor(year)+factor(canton)+ldir_pc + def_pc +hddi+suffoek+hddi + competition*nonagr+ left + ineqcow+fed_pc+ printed.records, data=datwint)
N.sim <- 1000
BETA <- mvrnorm(N.sim,coef(model.plot), vcov(model.plot))
COMP <- seq(-1,0,length.out = 100)
summary(datwint$competition)
NonA <- seq(0,1,length.out = 100)

marg.pop.dd <- matrix(rep(BETA[,"competition"],100),N.sim,100) *.7  + as.matrix(BETA[,"competition:nonagr"]) %*% t(as.matrix(NonA)) *0.7


plot(NonA,colMeans(marg.pop.dd), type="l", ylab="Marginal Effect of New Elite Political Representation", 
     xlab="Level of Industrialization", col="blue", lwd=2, bty="n")
ci.marg.pop.dd  <- apply(marg.pop.dd,2,quantile,c(0.025,0.975))
points(NonA, ci.marg.pop.dd[1,], type="l", lty=2, col="blue", lwd=2)
points(NonA, ci.marg.pop.dd[2,], type="l", lty=2, col="blue", lwd=2)
abline(h=0, lwd=4, col=rgb(190,190,190,200,maxColorValue = 255))
for (i in 1:N.sim){
  points(NonA,marg.pop.dd[i,], type="l",
         col=rgb(0,0,255,15,maxColorValue = 255))
}
rug(datwint$nonagr, ticksize = 0.02)

## Direct Democracy
model.plot <-lm(dir_pc~factor(year)+factor(canton)+ldir_pc + def_pc + hddi+suffoek+hddi*nonagr+ competition + left + ineqcow+fed_pc+ printed.records, data=datwint)

N.sim <- 1000
BETA <- mvrnorm(N.sim,coef(model.plot), vcov(model.plot))
dd <- seq(0,4.5,length.out = 100)
summary(datwint$hddi)
summary(datwint$nonagr)
NonA <- seq(0,1,length.out = 100)

marg.sec.dd <- matrix(rep(BETA[,"hddi"],100),N.sim,100) *1.5 + as.matrix(BETA[,"hddi:nonagr"]) %*% t(as.matrix(NonA)) *1.5


plot(NonA,colMeans(marg.sec.dd), type="l", ylab="Marginal Effect of Extra-Parliamentary Political Competition",
     xlab="Level of Industrialization", col="red", lwd=2, bty="n")
ci.marg.pop.dd  <- apply(marg.sec.dd,2,quantile,c(0.025,0.975))
points(NonA, ci.marg.pop.dd[1,], type="l", lty=2, col="red", lwd=2)
points(NonA, ci.marg.pop.dd[2,], type="l", lty=2, col="red", lwd=2)
abline(h=0, lwd=4, col=rgb(190,190,190,200,maxColorValue = 255))
for (i in 1:N.sim){
  points(NonA,marg.sec.dd[i,], type="l",
         col=rgb(255,0,0,20,maxColorValue = 255))
}
rug(datwint$nonagr, ticksize = 0.02)



dev.off()



#########################################################################################################
#########################################################################################################
# Figure 4 in MS
#########################################################################################################
#########################################################################################################

model.plot <- lm(dir_pc~factor(canton) + factor(canton)+ldir_pc + def_pc +hddi+suffoek+hddi + competition*nonagr+ hddi*nonagr+ left + ineqcow+fed_pc + printed.records, data=datwint)
N.sim <- 1000
BETA <- mvrnorm(N.sim,coef(model.plot), vcov(model.plot))

nonagrL <- 0.66
a <- (BETA[,"competition"] + BETA[,"competition:nonagr"]*nonagrL)/(1-BETA[,"ldir_pc"])
QQ <- quantile(a, c(0.025,.5,.975))

nonagrL <- 0.79
b <- (BETA[,"competition"] + BETA[,"competition:nonagr"]*nonagrL)/(1-BETA[,"ldir_pc"])
QQ1 <- quantile(b, c(0.025,.5,.975))


par(mfrow=c(2,1), family="CMU Serif")
hist(a, xlim=c(-200,700), breaks=seq(floor(min(a)),ceiling(max(a)),length.out = 100), main="", col=rgb(0,0,255,50,maxColorValue = 255), border="white",
     xlab="Marginal Effect of New Elite Political Representation", ylab="Canton of Zurich")
abline(v=QQ[1], col="blue", lty=2)
abline(v=QQ[3], col="blue", lty=2)
abline(v=QQ[2], col="blue", lty=1, lwd=3)


hist(b, xlim=c(-200,700), breaks=seq(floor(min(b)),ceiling(max(b)),length.out = 100), main="", col=rgb(255,0,0,50,maxColorValue = 255), border="white",
     xlab="Marginal Effect of New Elite Political Representation", ylab="Canton of Neuchatel")
abline(v=QQ1[1], col="red", lty=2)
abline(v=QQ1[3], col="red", lty=2)
abline(v=QQ1[2], col="red", lty=1, lwd=3)

# Specific numbers mentioned in text
QQ
QQ1
zh.lt <- mean(QQ)
sink("zh_lt_cp.tex")
cat(sprintf(as.character(round(zh.lt))), "\n")
sink()

zh.lt1 <- zh.lt * 0.5
sink("zh_lt_cp1.tex")
cat(sprintf(as.character(round(zh.lt1))), "\n")
sink()

ne.lt <- mean(QQ1)
sink("ne_lt_cp.tex")
cat(sprintf(as.character(round(ne.lt))), "\n")
sink()

ne.lt1 <- ne.lt * 0.5
sink("ne_lt_cp1.tex")
cat(sprintf(as.character(round(ne.lt1))), "\n")
sink()





#########################################################################################################
#########################################################################################################
# Figure 5 in MS
#########################################################################################################
#########################################################################################################


model.plot <- lm(dir_pc~factor(canton) + factor(year)+ldir_pc + def_pc +hddi+suffoek+hddi + competition*nonagr+ hddi*nonagr+ left + ineqcow+fed_pc + printed.records, data=datwint)
N.sim <- 1000
BETA <- mvrnorm(N.sim,coef(model.plot), vcov(model.plot))
nonagrL <- 0.66
a <- (BETA[,"hddi"] + BETA[,"hddi:nonagr"]*nonagrL)/(1-BETA[,"ldir_pc"])
QQ <- quantile(a, c(0.025,.5,.975))

nonagrL <- 0.79
b <- (BETA[,"hddi"] + BETA[,"hddi:nonagr"]*nonagrL)/(1-BETA[,"ldir_pc"])
QQ1 <- quantile(b, c(0.025,.5,.975))

par(mfrow=c(2,1), family="CMU Serif")
hist(a,  breaks=seq(floor(min(a)),ceiling(max(a)),length.out = 100), main="", col=rgb(0,0,255,50,maxColorValue = 255), border="white",
     xlab="Marginal Effect of Extra-Parliamentary Political Competition", ylab="Canton of Zurich", xlim=c(-50,200))
abline(v=QQ[1], col="blue", lty=2)
abline(v=QQ[3], col="blue", lty=2)
abline(v=QQ[2], col="blue", lty=1, lwd=3)


hist(b,  breaks=seq(floor(min(b)),ceiling(max(b)),length.out = 100), main="", col=rgb(255,0,0,50,maxColorValue = 255), border="white",
     xlab="Marginal Effect of Extra-Parliamentary Political Competition", ylab="Canton of Neuchatel", xlim=c(-50,200))
abline(v=QQ1[1], col="red", lty=2)
abline(v=QQ1[3], col="red", lty=2)
abline(v=QQ1[2], col="red", lty=1, lwd=3)


zh.lt <- mean(QQ)
sink("zh_lt_dd.tex")
cat(sprintf(as.character(round(zh.lt))), "\n")
sink()

zh.lt1 <- zh.lt * 3
sink("zh_lt_dd1.tex")
cat(sprintf(as.character(round(zh.lt1))), "\n")
sink()

zh.dd.inc <- 100*zh.lt1/233
sink("zh_lt_dd2.tex")
cat(trimws(sprintf(as.character(round(zh.dd.inc)), "\n"), 
           which = c("both", "left", "right")))
sink()


ne.lt <- mean(QQ1)
sink("ne_lt_cp2.tex")
cat(sprintf(as.character(round(ne.lt))), "\n")
sink()

ne.lt1 <- ne.lt * 3
sink("ne_lt_dd3.tex")
cat(sprintf(as.character(round(ne.lt1))), "\n")
sink()









#########################################################################################################
#########################################################################################################
#########################################################################################################
#########################################################################################################
#
#
#                                 A P P E N D I X
#
# 
#########################################################################################################
#########################################################################################################
#########################################################################################################
#########################################################################################################





#########################################################################################################
#########################################################################################################
# Section A1 in Appendix
#########################################################################################################
#########################################################################################################



#########################################################################################################
# Monte Carlo Part
#########################################################################################################

negbin.animal.size <- function(theta,y){
  a <- (theta[1])
  b <- theta[2]
  B <- 1/(1+exp(-b))
  Pr <- rep(NA,21)
  for (i in 1:21){
    Pr[i] <- gamma(a+i)/(gamma(i+1)*gamma(a))*(1-B)^i*B^(a) / (1-(B)^a)
    # truncated negative binomial distribution, Sampford (1958)  
  }
  p1 <- sum(Pr[1:4])        # corresponds to 1-4 animals
  p2 <- sum(Pr[5:10])       # corresponds to 5 to 10
  p3 <- sum(Pr[11:20])    # corresponds to 11 to 20
  p4 <- 1-sum(Pr[1:20])   # corresponds to 21 and more 
  p <- cbind(p1,p2,p3,p4)
  P <- p[y]
  ll <- sum(log(P))
  return(ll)    
}

A <- c(1:70)/10 
B <- -1 

N.aVal <- length(A)
N.sim <- 50


gini.true <- matrix(NA,N.aVal,N.sim)
gini.est <- matrix(NA,N.aVal,N.sim)

for (k in 1:N.aVal){
  aaa <- A[k]
  for (n in 1:N.sim){
    X <- rnbinom(20000,p=1/(1+exp(-B)),abs(aaa))
    gini.true[k,n] <- ineq(X,type="Gini")
    
    # censured version
    X <- X[-which(X==0)]
    XX <- rep(NA,4)
    XX[1] <- length(which(X>0 & X<5))
    XX[2] <- length(which(X>4 & X<11))
    XX[3] <- length(which(X>10 & X<21))
    XX[4] <- length(which(X>20))
    
    Y <- rep(4, length(X))
    Y[1:XX[1]] <- 1
    Y[(XX[1]+1):(XX[1] + XX[2])] <- 2
    Y[(XX[1] + XX[2]+1):(XX[1] + XX[2]+ XX[3])] <- 3
    
    startval <- c(10,0)
    tryCatch({
      out.mc <- optim(startval,negbin.animal.size, y=Y, hessian = TRUE, 
                      method="L-BFGS-B",  lower=c(0.0000001, -Inf), upper=rep(Inf, 2),
                      control=list(fnscale=-1,trace=15))},
      error=function(e){})
    
    X.sim <- rnbinom(100000,p=1/(1+exp(-out.mc$par[2])),abs(out.mc$par[1]))
    gini.est[k,n] <- ineq(X.sim,type="Gini")
  }  
  print(paste("*********************  Run",k,"von", N.aVal, "********************"))
}



#########################################################################################################
#########################################################################################################
#  Figure A1 in Appendix
#########################################################################################################
#########################################################################################################

i <- c(1:length(A))%%2==0
even <- c(1:length(A))[i]

par(family="CMU Serif")
plot(0,0, bty="n",ylim=c(0.25,0.6), xlim=c(0.25,0.6), ylab="True Measured Gini", xlab="Estimated Gini")
for (i in even){
  points(gini.true[i,],gini.est[i,], col=rgb(0,0,255,50,maxColorValue = 255), cex=.5)  
}




#########################################################################################################
#########################################################################################################
#  Estimation with Farm Animal Data
#########################################################################################################
#########################################################################################################

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# COWS

negbin.animal.size <- function(theta,y){
  a <- (theta[1])
  b <- theta[2]
  B <- 1/(1+exp(-b))
  Pr <- rep(NA,21)
  for (i in 1:21){
    Pr[i] <- gamma(a+i)/(gamma(i+1)*gamma(a))*(1-B)^i*B^(a) / (1-(B)^a)
    # truncated negative binomial distribution, Sampford (1958)  
  }
  p1 <- sum(Pr[1:4])        # corresponds to 1-4 animals
  p2 <- sum(Pr[5:10])       # corresponds to 5 to 10
  p3 <- sum(Pr[11:20])    # corresponds to 11 to 20
  p4 <- 1-sum(Pr[1:20])   # corresponds to 21 and more 
  p <- cbind(p1,p2,p3,p4)
  P <- p[y]
  ll <- sum(log(P))
  return(ll)    
}


data1 <- read.csv2("Data/Rindviehbesitzer nach Kanton 1866-1966.csv",stringsAsFactors = FALSE)

CANTON <- unique(as.character(data1$canton))
YEAR <- unique(data1$year)
length(YEAR)

GINI.matrix <- matrix(NA,25,19)
AVERAGE.matrix <- matrix(NA,25,19)

for (c in CANTON){
  for (y in YEAR){
    # Picking adequate row of raw data
    yy <- which(data1$year==y)
    cc <- which(data1$canton==c)
    rowL <- intersect(yy,cc)
    data2 <- as.numeric(data1[rowL,-c(1,2)])
    # generate observation for each farm
    Y <- rep(4, sum(data2))
    Y[1:data2[1]] <- 1
    Y[(data2[1]+1):(data2[1] + data2[2])] <- 2
    Y[(data2[1] + data2[2]+1):(data2[1] + data2[2]+ data2[3])] <- 3
    
    # estimate parameters of underlying distribution
  
    startval <- c(10,0)
    out <- optim(startval,negbin.animal.size, y=Y, hessian = TRUE, 
                 method="L-BFGS-B",  lower=c(0.000000001, -Inf), upper=rep(Inf, 2),
                 control=list(fnscale=-1,trace=15))
    
    # Generate 1 mio simulations and determine Gini coefficient
    theta <- out$par
    cows <- rnbinom(10000000,p=1/(1+exp(-theta[2])),abs(theta[1]))
    cows <- cows[-which(cows==0)]
    summary(cows)
    giniL <- ineq(cows,type="Gini")
    GINI.matrix[which(CANTON==c),which(YEAR==y)] <- giniL
    AVERAGE.matrix[which(CANTON==c),which(YEAR==y)] <- mean(Y)
  }
}

rownames(GINI.matrix) <- CANTON
colnames(GINI.matrix) <- YEAR
GINI.matrix.cow <- GINI.matrix
save(GINI.matrix, file="Data/Gini.Cows.RData")


# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# FARMS

negbin.animal.size.farm <- function(theta,counter){
  a <- (theta[1])
  b <- theta[2]
  B <- 1/(1+exp(-b))
  Pr <- rep(NA,60)
  for (i in 1:60){
    Pr[i] <- gamma(a+i)/(gamma(i+1)*gamma(a))*(1-B)^i*B^(a)/(1-gamma(a)/(gamma(a))*(B)^a)
    # truncated negative binomial distribution, Sampford (1958)  
  }
  p1 <- sum(Pr[1])        # corresponds to 1-5 animals
  p2 <- sum(Pr[2])       # corresponds to 6 to 25
  p3 <- sum(Pr[3:6])       # corresponds to 6 to 25
  p4 <- sum(Pr[7:10])       # corresponds to 6 to 25
  p5 <- sum(Pr[11:20])       # corresponds to 6 to 25
  p6 <- sum(Pr[21:30])       # corresponds to 6 to 25
  p7 <- sum(Pr[31:40])       # corresponds to 6 to 25
  p8 <- sum(Pr[41:60])       # corresponds to 6 to 25
  p9 <- 1-sum(Pr[1:60])   # corresponds to 26 and more 
  p <- cbind(p1,p2,p3,p4,p5,p6,p7,p8,p9)
  P <- p[counter]
  ll <- sum(log(P))
  return(ll)    
}

data1 <- read.csv2("Data/Betriebe nach der Grösse der Kulturfläche und Kantonen.csv",stringsAsFactors = FALSE, sep=",")

CANTON <- unique(as.character(data1$canton))
YEAR <- unique(data1$year)
length(YEAR)

GINI.matrix <- matrix(NA,25,10)
AVERAGE.matrix <- matrix(NA,25,10)

for (c in CANTON){
  for (y in YEAR){
    # Picking adequate row of raw data
    yy <- which(data1$year==y)
    cc <- which(data1$canton==c)
    rowL <- intersect(yy,cc)
    data2 <- as.numeric(data1[rowL,-c(1,2,3,5)])
    # generate observation for each farm
    Y <- rep(9, sum(data2))
    Y[1:data2[1]] <- 1
    Y[(data2[1]+1):(data2[1] + data2[2])] <- 2
    Y[(data2[1] + data2[2]+1):(data2[1] + data2[2]+ data2[3])] <- 3
    Y[(data2[1] + data2[2] + data2[3] +1):(data2[1] + data2[2]+ data2[3]+ data2[4])] <- 4
    Y[(data2[1] + data2[2] + data2[3] + data2[4] +1):(data2[1] + data2[2]+ data2[3]+ data2[4]+ data2[5])] <- 5
    Y[(data2[1] + data2[2] + data2[3] + data2[4]  + data2[5] +1):(data2[1] + data2[2]+ data2[3]+ data2[4]+ data2[5] + data2[6])] <- 6
    Y[(data2[1] + data2[2] + data2[3] + data2[4]  + data2[5] + data2[6] +1):(data2[1] + data2[2]+ data2[3]+ data2[4]+ data2[5] + data2[6]+ data2[7])] <- 7
    Y[(data2[1] + data2[2] + data2[3] + data2[4]  + data2[5] + data2[6] + data2[7] +1):(data2[1] + data2[2]+ data2[3]+ data2[4]+ data2[5] + data2[6]+ data2[7]+ data2[8] )] <- 8
    Y[(data2[1] + data2[2] + data2[3] + data2[4]  + data2[5] + data2[6] + data2[7] + data2[8] +1):(data2[1] + data2[2]+ data2[3]+ data2[4]+ data2[5] + data2[6]+ data2[7]+ data2[8]  + data2[9])] <- 9
    
    # estimate parameters of underlying distribution
    startval <- c(10,0.1)
    tryCatch({
      out <- optim(startval,negbin.animal.size.farm, counter=Y, hessian = TRUE, 
                   method="L-BFGS-B",  lower=c(0.00001, -Inf), upper=rep(Inf, 2),
                   control=list(fnscale=-1,trace=1))},
      error=function(e){})
    # Generate 1 mio simulations and determine Gini coefficient
    theta <- out$par
    cows <- rnbinom(10000000,p=1/(1+exp(-theta[2])),abs(theta[1]))
    cows <- cows[-which(cows==0)]
    summary(cows)
    giniL <- ineq(cows,type="Gini")
    GINI.matrix[which(CANTON==c),which(YEAR==y)] <- giniL
    AVERAGE.matrix[which(CANTON==c),which(YEAR==y)] <- mean(Y)
  }
}

rownames(GINI.matrix) <- CANTON
colnames(GINI.matrix) <- YEAR

GINI.matrix.farms <- GINI.matrix
save(GINI.matrix.farms, file="Data/Gini.Farms.RData")

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# PIGS

# see HTML document "Inequality_distribution.html" for details
negbin.animal.size.pigs <- function(theta,y){
  a <- (theta[1])
  b <- theta[2]
  B <- 1/(1+exp(-b))
  Pr <- rep(NA,10)
  for (i in 1:10){
    # G(x+n)/(G(n) x!) p^n (1-p)^x
    # i = x
    # B = p
    # a = n
    Pr[i] <- gamma(a+i)/(gamma(i+1)*gamma(a))*(1-B)^i*B^(a)/(1-gamma(a)/(gamma(a))*(B)^a)
    # truncated negative binomial distribution, Sampford (1958)  
  }
  p1 <- sum(Pr[1:3])        # corresponds to 1-3 animals
  p2 <- sum(Pr[4:10])       # corresponds to 4 to 10
  p3 <- 1-sum(Pr[1:10])   # corresponds to 11 and more 
  p <- cbind(p1,p2,p3)
  P <- p[y]
  ll <- sum(log(P))
  return(ll)    
}

data1 <- read.csv2("Data/Schweinebesitzer nach Kanton 1866-1966.csv",stringsAsFactors = FALSE)

CANTON <- unique(as.character(data1$canton))
YEAR <- unique(data1$year)
length(YEAR)

GINI.matrix <- matrix(NA,25,19)
AVERAGE.matrix <- matrix(NA,25,19)

for (c in CANTON){
  for (y in YEAR){
    # Picking adequate row of raw data
    yy <- which(data1$year==y)
    cc <- which(data1$canton==c)
    rowL <- intersect(yy,cc)
    data2 <- as.numeric(data1[rowL,-c(1,2,6,7)])
    # generate observation for each farm
    Y <- rep(3, sum(data2))
    Y[1:data2[1]] <- 1
    Y[(data2[1]+1):(data2[1] + data2[2])] <- 2
    
    # estimate parameters of underlying distribution
    startval <- c(10,0)
    out <- optim(startval,negbin.animal.size.pigs, y=Y, hessian = TRUE, 
                 method="SANN",  control=list(fnscale=-1,trace=1,maxit=1000))#},
    # Generate 1 mio simulations and determine Gini coefficient
    theta <- out$par
    cows <- rnbinom(10000000,p=1/(1+exp(-theta[2])),abs(theta[1]))
    cows <- cows[-which(cows==0)]
    summary(cows)
    giniL <- ineq(cows,type="Gini")
    GINI.matrix[which(CANTON==c),which(YEAR==y)] <- giniL
    AVERAGE.matrix[which(CANTON==c),which(YEAR==y)] <- mean(Y)
  }
}

rownames(GINI.matrix) <- CANTON
colnames(GINI.matrix) <- YEAR

GINI.matrix.pigs <- GINI.matrix
save(GINI.matrix.pigs, file="Data/Gini.Pigs.RData")

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# SHEEP

negbin.animal.size.sh <- function(theta,y){
  a <- (theta[1])
  b <- theta[2]
  B <- 1/(1+exp(-b))
  Pr <- rep(NA,25)
  for (i in 1:25){
    Pr[i] <- gamma(a+i)/(gamma(i+1)*gamma(a))*(1-B)^i*B^(a)/(1-gamma(a)/(gamma(a))*(B)^a)
    # truncated negative binomial distribution, Sampford (1958)  
  }
  p1 <- sum(Pr[1:5])        # corresponds to 1-5 animals
  p2 <- sum(Pr[6:25])       # corresponds to 6 to 25
  p3 <- 1-sum(Pr[1:25])   # corresponds to 26 and more 
  p <- cbind(p1,p2,p3)
  P <- p[y]
  ll <- sum(log(P))
  return(ll)    
}

data1 <- read.csv2("Data/Schafbesitzer nach Kanton 1866-1966.csv",stringsAsFactors = FALSE)

CANTON <- unique(as.character(data1$canton))
YEAR <- unique(data1$year)
length(YEAR)

GINI.matrix <- matrix(NA,25,19)
AVERAGE.matrix <- matrix(NA,25,19)

for (c in CANTON){
  for (y in YEAR){
    # Picking adequate row of raw data
    yy <- which(data1$year==y)
    cc <- which(data1$canton==c)
    rowL <- intersect(yy,cc)
    data2 <- as.numeric(data1[rowL,-c(1,2)])
    # generate observation for each farm
    Y <- rep(3, sum(data2))
    Y[1:data2[1]] <- 1
    Y[(data2[1]+1):(data2[1] + data2[2])] <- 2
    
    # estimate parameters of underlying distribution
    startval <- c(10,0)
    out <- optim(startval,negbin.animal.size.sh, y=Y, hessian = TRUE, 
                 method="SANN",  control=list(fnscale=-1,trace=1,maxit=1000))
    # Generate 1 mio simulations and determine Gini coefficient
    theta <- out$par
    cows <- rnbinom(10000000,p=1/(1+exp(-theta[2])),abs(theta[1]))
    cows <- cows[-which(cows==0)]
    summary(cows)
    giniL <- ineq(cows,type="Gini")
    GINI.matrix[which(CANTON==c),which(YEAR==y)] <- giniL
    AVERAGE.matrix[which(CANTON==c),which(YEAR==y)] <- mean(Y)
  }
}

rownames(GINI.matrix) <- CANTON
colnames(GINI.matrix) <- YEAR

GINI.matrix.sheep <- GINI.matrix
save(GINI.matrix.sheep, file="Data/Gini.Sheep.RData")

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# GOATS

negbin.animal.size.gt <- function(theta,y){
  a <- (theta[1])
  b <- theta[2]
  B <- 1/(1+exp(-b))
  Pr <- rep(NA,25)
  for (i in 1:25){
    Pr[i] <- gamma(a+i)/(gamma(i+1)*gamma(a))*(1-B)^i*B^(a)/(1-gamma(a)/(gamma(a))*(B)^a)
    # truncated negative binomial distribution, Sampford (1958)  
  }
  p1 <- sum(Pr[1:5])        # corresponds to 1-5 animals
  p2 <- sum(Pr[6:25])       # corresponds to 6 to 25
  p3 <- 1-sum(Pr[1:25])   # corresponds to 26 and more 
  p <- cbind(p1,p2,p3)
  P <- p[y]
  ll <- sum(log(P))
  return(ll)    
}

data1 <- read.csv2("Data/Ziegenbesitzer nach Kanton 1866-1966.csv",stringsAsFactors = FALSE)

CANTON <- unique(as.character(data1$canton))
YEAR <- unique(data1$year)
length(YEAR)

GINI.matrix <- matrix(NA,25,19)
AVERAGE.matrix <- matrix(NA,25,19)

for (c in CANTON){
  for (y in YEAR){
    # Picking adequate row of raw data
    yy <- which(data1$year==y)
    cc <- which(data1$canton==c)
    rowL <- intersect(yy,cc)
    data2 <- as.numeric(data1[rowL,-c(1,2)])
    # generate observation for each farm
    Y <- rep(3, sum(data2))
    Y[1:data2[1]] <- 1
    Y[(data2[1]+1):(data2[1] + data2[2])] <- 2
    
    # estimate parameters of underlying distribution
    startval <- c(10,0)
    tryCatch({
      out <- optim(startval,negbin.animal.size.gt, y=Y, hessian = TRUE, 
                   method="SANN",  control=list(fnscale=-1,trace=1,maxit=1000))},
      error=function(e){})
    # Generate 1 mio simulations and determine Gini coefficient
    theta <- out$par
    cows <- rnbinom(10000000,p=1/(1+exp(-theta[2])),abs(theta[1]))
    cows <- cows[-which(cows==0)]
    summary(cows)
    giniL <- ineq(cows,type="Gini")
    GINI.matrix[which(CANTON==c),which(YEAR==y)] <- giniL
    AVERAGE.matrix[which(CANTON==c),which(YEAR==y)] <- mean(Y)
  }
}

rownames(GINI.matrix) <- CANTON
colnames(GINI.matrix) <- YEAR

GINI.matrix.goats <- GINI.matrix
save(GINI.matrix.goats, file="Data/Gini.Goats.RData")


# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# HORSE

negbin.animal.size.horse <- function(theta,y){
  a <- (theta[1])
  b <- theta[2]
  B <- 1/(1+exp(-b))
  Pr <- rep(NA,25)
  for (i in 1:3){
    Pr[i] <- gamma(a+i)/(gamma(i+1)*gamma(a))*(1-B)^i*B^(a)/(1-gamma(a)/(gamma(a))*(B)^a)
    # truncated negative binomial distribution, Sampford (1958)  
  }
  p1 <- sum(Pr[1])        # corresponds to 1 animals
  p2 <- sum(Pr[2])       # corresponds to 2
  p3 <- 1-sum(Pr[1:2])   # corresponds to 3 and more 
  p <- cbind(p1,p2,p3)
  P <- p[y]
  ll <- sum(log(P))
  return(ll)    
}

data1 <- read.csv2("Data/Besitzer von Pferden, Maultieren und Eseln Mix Lucas.csv",stringsAsFactors = FALSE,sep = ",")

data1.sort <- data.frame(matrix(NA,dim(data1)[1],dim(data1)[2]))

CANTON <- c("ZH", "BE", "LU", "UR", "SZ", "OW", "NW", "GL", "ZG", "FR", "SO", "BS", "BL", "SH", "AR", "AI", "SG", "GR", "AG", "TG", "TI", "VD", "VS", "NE", "GE")
YEAR <- unique(data1$year)

r1 <- 1

for (c in CANTON){
  data.x <- data1[data1$canton==c,]
  data1.sort[c(r1:(r1+18)),] <- data.x
  r1 <- r1+19
}
cN <- colnames(data1)
data1 <- data1.sort
colnames(data1) <- cN

CANTON <- unique(as.character(data1$canton))
YEAR <- unique(data1$year)
length(YEAR)

GINI.matrix <- matrix(NA,25,19)
AVERAGE.matrix <- matrix(NA,25,19)

for (c in CANTON){
  for (y in YEAR){
    # Picking adequate row of raw data
    yy <- which(data1$year==y)
    cc <- which(data1$canton==c)
    rowL <- intersect(yy,cc)
    data2 <- as.numeric(data1[rowL,-c(1,2)])
    # generate observation for each farm
    Y <- rep(3, sum(data2))
    Y[1:data2[1]] <- 1
    Y[(data2[1]+1):(data2[1] + data2[2])] <- 2
    
    # estimate parameters of underlying distribution
    startval <- c(10,0)
    tryCatch({
      out <- optim(startval,negbin.animal.size.horse, y=Y, hessian = TRUE, 
                   method="SANN",  control=list(fnscale=-1,trace=1,maxit=1000))},
      error=function(e){})
    # Generate 1 mio simulations and determine Gini coefficient
    theta <- out$par
    cows <- rnbinom(10000000,p=1/(1+exp(-theta[2])),abs(theta[1]))
    cows <- cows[-which(cows==0)]
    summary(cows)
    giniL <- ineq(cows,type="Gini")
    GINI.matrix[which(CANTON==c),which(YEAR==y)] <- giniL
    AVERAGE.matrix[which(CANTON==c),which(YEAR==y)] <- mean(Y)
  }
}

rownames(GINI.matrix) <- CANTON
colnames(GINI.matrix) <- YEAR

GINI.matrix.horses <- GINI.matrix
save(GINI.matrix.horses, file="Data/Gini.Horse.RData")



#########################################################################################################
#########################################################################################################
#  Figure A2 in Appendix
#########################################################################################################
#########################################################################################################

load("Data/Gini.Cows.RData")
load("Data/Gini.Horse.RData")
load("Data/Gini.Pigs.RData")
load("Data/Gini.Sheep.RData")
load("Data/Gini.Goats.RData")
load("Data/Gini.Farms.RData")

FARMS <- GINI.matrix.farms[,c(1:4,6)]
COWS <- GINI.matrix[,c(c(6,12,14,17,19))]
HORSES <- GINI.matrix.horses[,c(c(6,12,14,17,19))]
PIGS <- GINI.matrix.pigs[,c(c(6,12,14,17,19))]
SHEEP <- GINI.matrix.sheep[,c(c(6,12,14,17,19))]
GOATS <- GINI.matrix.goats[,c(c(6,12,14,17,19))]

data.f <- melt(FARMS)
data.c <- melt(COWS)
data.h <- melt(HORSES)
data.p <- melt(PIGS)
data.g <- melt(GOATS)
data.s <- melt(SHEEP)

data.a <- cbind(data.f,data.c,data.h,data.p,data.g,data.s)
colnames(data.a)[c(1,3,6,9,12,15,18)] <- c("canton","FarmGini","CowGini","HorseGini","PigGini","GoatGini","SheepGini")

fit1 <- princomp(data.a[,c(3,6,9,12,15,18)], cor=TRUE)
summary(fit1)
plot(fit1, type="lines")
biplot(fit1)






#########################################################################################################
#########################################################################################################
#  Table A3 in Appendix
#########################################################################################################
#########################################################################################################

r.sq <- c()

mod4 <- lm(dir_pc~factor(year) + factor(canton)+ldir_pc + def_pc +hddi+suffoek+hddi + competition*nonagr+hddi*nonagr+ 
             left + ineqcow+ fed_pc + printed.records, data=datwint)
r.sq <- c(r.sq, summary(mod4)$r.squared)
mod19 <-  coeftest(mod4, vcov = vcovHAC(mod4, type="HC3"))

mod4 <- lm(dir_pc~factor(year)  + factor(year)+factor(canton)+ldir_pc + def_pc +hddi+suffoek +
             maj*nonagr+hddi*nonagr+ left + ineqcow+ fed_pc+ printed.records, data=datwint)
r.sq <- c(r.sq, summary(mod4)$r.squared)
mod21 <-  coeftest(mod4, vcov = vcovHAC(mod4, type="HC3"))

mod4 <- lm(dir_pc~factor(year)  + factor(year)+factor(canton)+ldir_pc + def_pc +hddi+suffoek + 
             dominance*nonagr+hddi*nonagr+ left + ineqcow+ fed_pc+ printed.records, data=datwint)
r.sq <- c(r.sq, summary(mod4)$r.squared)
mod22 <-  coeftest(mod4, vcov = vcovHAC(mod4, type="HC3"))

# Maybe drop PR?
screenreg(list(mod19,mod21,mod22),omit.coef="factor|Intercept|year", stars = c( 0.01, 0.05, 0.1), reorder.coef = c(5,13,15,3,6,11,14,16,12,1,2,4,7,8,9,10))



texreg(list(mod19,mod21,mod22),omit.coef="factor|Intercept|year", stars = c( 0.01, 0.05, 0.1), digits=2, 
       custom.model.names=c("Model 5","Model 9","Model 10"), reorder.coef = c(5,13,15,3,6,11,14,16,12,1,2,4,7,8,9,10),
       custom.coef.names =c("\\sc{Lag DV}","\\sc{Deficit p.c.}","\\sc{Direct Democracy}", "\\sc{Suffrage Restirctions}", 
                            "\\sc{Political Competition}","\\sc{Share Non-Agricultural Sector}", "\\sc{Share Left}","\\sc{Rural Inequality}", 
                            "\\sc{Federal Payments (p.c.)}", "\\sc{Printed Documents}",
                            "\\sc{Pol. Competition$\\times$  \\% Non-Agr}", "\\sc{Direct Democracy$\\times$ \\% Non-Agr}", 
                            "\\sc{Exec. Maj.}", "\\sc{Exec. Maj.$\\times$ \\% Non-Agr}", "\\sc{Exec. Dominance}",
                            "\\sc{Exec. Dominance$\\times$ \\% Non-Agr}"))




#########################################################################################################
#########################################################################################################
#  Table A4 in Appendix
#########################################################################################################
#########################################################################################################

r.sq <- c()

mod4 <- lm(dir_pc~factor(year)+ factor(canton)+ldir_pc + def_pc +hddi+suffoek+hddi + competition + hddi*nonagr+ left + ineqcow+ fed_pc + printed.records, data=datwintoLG)
summary(mod4)
r.sq <- c(r.sq, summary(mod4)$r.squared)
mod13 <-  coeftest(mod4, vcov = vcovHAC(mod4, type="HC3"))

mod4 <- lm(dir_pc~factor(year)+factor(canton)+ldir_pc + def_pc +hddi+suffoek+hddi + competition*nonagr+ left + ineqcow+ fed_pc+ printed.records, data=datwintoLG)
summary(mod4)
r.sq <- c(r.sq, summary(mod4)$r.squared)
mod14 <-  coeftest(mod4, vcov = vcovHAC(mod4, type="HC3"))

mod4 <- lm(dir_pc~factor(year) +factor(canton)+ldir_pc + def_pc +hddi+suffoek+hddi + competition*nonagr+ hddi*nonagr+ left + ineqcow+ fed_pc+ printed.records, data=datwintoLG)
summary(mod4)
r.sq <- c(r.sq, summary(mod4)$r.squared)
mod14.1 <-  coeftest(mod4, vcov = vcovHAC(mod4, type="HC3"))

screenreg(list(mod14,mod13,mod14.1),omit.coef="factor|Intercept|year", stars = c( 0.01, 0.05, 0.1), reorder.coef = c(5,3,6,11,12,1,2,4,7,8,9,10))

texreg(list(mod14,mod13,mod14.1),omit.coef="factor|Intercept|year", stars = c( 0.01, 0.05, 0.1), digits=2, 
       custom.model.names=c("Model 6","Model 7","Model 8"),reorder.coef = c(5,3,6,11,12,1,2,4,7,8,9,10),
       custom.coef.names =c("\\sc{Lag DV}","\\sc{Deficit p.c.}","\\sc{Direct Democracy}", "\\sc{Suffrage Restirctions}", 
                            "\\sc{Political Competition}","\\sc{Share Non-Agricultural Sector}", "\\sc{Share Left}", "\\sc{Rural Inequality}", 
                            "\\sc{Federal Payments (p.c.)}","\\sc{Printed Documents}" ,"\\sc{Pol. Competition$\\times$  \\% Non-Agr}",
                            "\\sc{Direct Democracy$\\times$ \\% Non-Agr}"))





#########################################################################################################
#########################################################################################################
#  Table A5 in Appendix
#########################################################################################################
#########################################################################################################

r.sq <- c()

mod4 <- lm(dir_pc~ factor(year)+factor(canton)+ldir_pc + def_pc + hddi+suffoek+hddi + nonagr+ competition + left + ineqcow + fed_pc + printed.records  , data=datwint[datwint$year<1890,])
r.sq <- c(r.sq,summary(mod4)$r.squared)
mod6 <-  coeftest(mod4, vcov = vcovHAC(mod4, type="HC3"))

mod4 <- lm(dir_pc~factor(canton)+factor(year)+ldir_pc + def_pc +hddi+suffoek+hddi + competition*nonagr+ left + ineqcow+ fed_pc + printed.records  , data=datwint[datwint$year<1890,])
r.sq <- c(r.sq,summary(mod4)$r.squared)
mod7 <-  coeftest(mod4, vcov = vcovHAC(mod4, type="HC3"))

mod4 <- lm(dir_pc~factor(year)+factor(canton)+ldir_pc + def_pc +hddi+suffoek+competition + hddi*nonagr+  left + ineqcow+ fed_pc + printed.records  , data=datwint[datwint$year<1890,])
r.sq <- c(r.sq,summary(mod4)$r.squared)
mod8 <-  coeftest(mod4, vcov = vcovHAC(mod4, type="HC3"))

mod4 <- lm(dir_pc~factor(year)+factor(canton)+ldir_pc + def_pc + hddi+suffoek+competition*nonagr +hddi*nonagr+ left + ineqcow + fed_pc + printed.records   , data=datwint[datwint$year<1890,])
r.sq <- c(r.sq,summary(mod4)$r.squared)
mod8.1 <-  coeftest(mod4, vcov = vcovHAC(mod4, type="HC3"))


screenreg(list(mod6,mod7,mod8,mod8.1),omit.coef="factor|Intercept|year", stars = c( 0.01, 0.05, 0.1), reorder.coef = c(6,3,5,11,12,1,2,4,7,8,9,10))

texreg(list(mod6,mod7,mod8,mod8.1),omit.coef="factor|Intercept|year", stars = c( 0.01, 0.05, 0.1), digits=2, 
       custom.model.names=c("Model 11","Model 12","Model 13","Model 14"), reorder.coef = c(6,3,5,11,12,1,2,4,7,8,9,10),
       custom.coef.names =c("\\sc{Lag DV}","\\sc{Deficit p.c.}","\\sc{Direct Democracy}", "\\sc{Suffrage Restrictions}", 
                            "\\sc{Share Non-Agricultural Sector}","\\sc{Political Competition}", "\\sc{Share Left}","\\sc{Rural Inequality}", 
                            "\\sc{Federal Payments (p.c.)}","\\sc{Printed Documents}","\\sc{Pol. Competition $\\times$  \\% Non-Agr}",
                            "\\sc{Direct Democracy$\\times$ \\% Non-Agr}"))

round(r.sq,3)



#########################################################################################################
#########################################################################################################
#  Table A6 in Appendix
#########################################################################################################
#########################################################################################################

r.sq <- c()

mod4 <- lm(dir_pc~factor(year) + factor(canton)+ldir_pc + def_pc + hddi+suffoek+hddi + nonagr+ competition + left + ineqcow+ fed_pc + printed.records + poverty , data=datwint)
r.sq <- c(r.sq,summary(mod4)$r.squared)
mod5 <-  coeftest(mod4, vcov = vcovHAC(mod4, type="HC3"))

mod4 <- lm(dir_pc~ factor(year)+factor(canton)+ldir_pc + def_pc + hddi+suffoek+hddi*nonagr+ competition + left + ineqcow + fed_pc + printed.records + poverty , data=datwint)
r.sq <- c(r.sq,summary(mod4)$r.squared)
mod6 <-  coeftest(mod4, vcov = vcovHAC(mod4, type="HC3"))

mod4 <- lm(dir_pc~factor(year)+factor(canton)+ldir_pc + def_pc +hddi+suffoek+hddi + competition*nonagr+  left + ineqcow+ fed_pc + printed.records+ poverty  , data=datwint)
r.sq <- c(r.sq,summary(mod4)$r.squared)
mod8 <-  coeftest(mod4, vcov = vcovHAC(mod4, type="HC3"))

mod4 <- lm(dir_pc~factor(year)+factor(canton)+ldir_pc + def_pc + hddi+suffoek+competition*nonagr +hddi*nonagr+ left + ineqcow + fed_pc + printed.records + poverty  , data=datwint)
r.sq <- c(r.sq,summary(mod4)$r.squared)
mod8.1 <-  coeftest(mod4, vcov = vcovHAC(mod4, type="HC3"))

screenreg(list(mod5,mod8,mod6,mod8.1),omit.coef="factor|Intercept|year", stars = c( 0.01, 0.05, 0.1), reorder.coef = c(6,3,5,12,13,1,2,4,7,8,9,10,11))

texreg(list(mod5,mod8,mod6,mod8.1),omit.coef="factor|Intercept|year", stars = c( 0.01, 0.05, 0.1), digits=2, 
       custom.model.names=c("Model 15","Model 16","Model 17","Model 18"), reorder.coef = c(6,3,5,12,13,1,2,4,7,8,9,10,11),
       custom.coef.names =c("\\sc{Lag DV}","\\sc{Deficit p.c.}","\\sc{Direct Democracy}", "\\sc{Suffrage Restrictions}", 
                            "\\sc{Share Non-Agricultural Sector}","\\sc{Political Competition}", "\\sc{Share Left}","\\sc{Rural Inequality}", 
                            "\\sc{Federal Payments (p.c.)}","\\sc{Printed Documents}","\\sc{Poverty}","\\sc{Pol. Competition $\\times$  \\% Non-Agr}",
                            "\\sc{Direct Democracy$\\times$ \\% Non-Agr}"))

round(r.sq,3)




#########################################################################################################
#########################################################################################################
#  Table A7 in Appendix
#########################################################################################################
#########################################################################################################

r.sq <- c()

mod4 <- lm(dir_pc~ factor(year)+factor(canton)+ldir_pc + def_pc + hddi+suffoek+hddi*nonagr+ competition + 
             left + ineqcow +spatialL_dir_pc+ fed_pc + printed.records, data=datwint)
r.sq <- c(r.sq,summary(mod4)$r.squared)
mod23 <-  coeftest(mod4, vcov = vcovHAC(mod4, type="HC3"))

mod4 <- lm(dir_pc~factor(year)+factor(canton)+ldir_pc + def_pc +hddi+suffoek+hddi + competition*nonagr+  
             left + ineqcow+spatialL_dir_pc+ fed_pc+ printed.records, data=datwint)
r.sq <- c(r.sq,summary(mod4)$r.squared)
mod24 <-  coeftest(mod4, vcov = vcovHAC(mod4, type="HC3"))

mod4 <- lm(dir_pc~factor(year)+factor(canton)+ldir_pc + def_pc + hddi+suffoek+competition*nonagr +
             hddi*nonagr+ left + ineqcow +spatialL_dir_pc+ fed_pc+ printed.records, data=datwint)
r.sq <- c(r.sq,summary(mod4)$r.squared)
mod25 <-  coeftest(mod4, vcov = vcovHAC(mod4, type="HC3"))

screenreg(list(mod24,mod23,mod25),omit.coef="factor|Intercept|year", stars = c( 0.01, 0.05, 0.1),reorder.coef = c(6,3,5,12,13,1,2,4,7,8,10,11,9))


texreg(list(mod24,mod23,mod25),omit.coef="factor|Intercept|year", stars = c( 0.01, 0.05, 0.1), digits=2, 
       custom.model.names=c("Model 19","Model 20","Model 21"),reorder.coef = c(6,3,5,12,13,1,2,4,7,8,10,11,9),
       custom.coef.names =c("\\sc{Lag DV}","\\sc{Deficit p.c.}","\\sc{Direct Democracy}", "\\sc{Suffrage Restirctions}", 
                            "\\sc{Share Non-Agricultural Sector}","\\sc{Political Competition}", "\\sc{Share Left}","\\sc{Rural Inequality}", 
                            "\\sc{Spatial Lag}","\\sc{Federal Payments (p.c.)}" , "\\sc{Printed Documents}",
                            "\\sc{Pol. Competition$\\times$  \\% Non-Agr}","\\sc{Direct Democracy$\\times$ \\% Non-Agr}"))



#########################################################################################################
#########################################################################################################
#  Table A8 in Appendix
#########################################################################################################
#########################################################################################################

datwint$y2 <- datwint$year
datwint <- datwint %>% group_by(canton) %>% mutate(ltot_tax_pc = lag(tot_tax_pc))
datwint <- data.frame(datwint)

r.sq <- c()

mod4 <- lm(tot_tax_pc~factor(canton)+factor(year)+ltot_tax_pc + def_pc + hddi+suffoek+hddi + nonagr+ competition + left + ineqcow+ fed_pc + printed.records  , data=datwint)
r.sq <- c(r.sq,summary(mod4)$r.squared)
mod5 <-  coeftest(mod4, vcov = vcovHAC(mod4, type="HC3"))

mod4 <- lm(tot_tax_pc~factor(year)+factor(canton)+ltot_tax_pc + def_pc + hddi+suffoek+hddi*nonagr+ competition + left + ineqcow + fed_pc + printed.records  , data=datwint)
r.sq <- c(r.sq,summary(mod4)$r.squared)
mod6 <-  coeftest(mod4, vcov = vcovHAC(mod4, type="HC3"))

mod4 <- lm(tot_tax_pc~factor(year)+factor(canton)+ltot_tax_pc + def_pc +hddi+suffoek+hddi + competition*nonagr+  left + ineqcow+ fed_pc + printed.records  , data=datwint)
r.sq <- c(r.sq,summary(mod4)$r.squared)
mod8 <-  coeftest(mod4, vcov = vcovHAC(mod4, type="HC3"))

mod4 <- lm(tot_tax_pc~factor(year)+factor(canton)+ltot_tax_pc + def_pc + hddi+suffoek+competition*nonagr +hddi*nonagr+ left + ineqcow + fed_pc + printed.records   , data=datwint)
r.sq <- c(r.sq,summary(mod4)$r.squared)
mod8.1 <-  coeftest(mod4, vcov = vcovHAC(mod4, type="HC3"))
nobs(mod4)

screenreg(list(mod5,mod8,mod6,mod8.1),omit.coef="factor|Intercept|year", stars = c( 0.01, 0.05, 0.1))





#########################################################################################################
#########################################################################################################
#  Table A9 in Appendix
#########################################################################################################
#########################################################################################################

mod4 <- lm(edu_pc~factor(year)+factor(canton)+tot_tax_pc + ledu_pc+ def_pc+ babydeathratio_impute+ fed_pc , data=datwint)
r.sq <- c(r.sq,summary(mod4)$r.squared)
mod28 <-  coeftest(mod4, vcov = vcovHAC(mod4, type="HC3"))

screenreg(list(mod28),omit.coef="factor|Intercept|year", stars = c( 0.01, 0.05, 0.1))














